{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Reverse-geocode lat-long to census geography\n",
    "\n",
    "Using https://geocoding.geo.census.gov/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import time, pandas as pd, requests"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Define helper functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def census_geocode(lon, lat, benchmark='Public_AR_Current', vintage='Current_Current',\n",
    "                   output_format='json', layers='Census Tracts'):\n",
    "    \n",
    "    url = ('https://geocoding.geo.census.gov/geocoder/geographies/coordinates?'\n",
    "           'benchmark={benchmark}&vintage={vintage}&x={lon}&y={lat}&format={output_format}&layers={layers}')\n",
    "    \n",
    "    prepared_url = url.format(benchmark=benchmark, vintage=vintage, lon=lon, lat=lat, \n",
    "                              output_format=output_format, layers=layers)\n",
    "\n",
    "    response = requests.get(prepared_url)\n",
    "    return response"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def geocode_row(row):\n",
    "\n",
    "    response = census_geocode(lon=row['longitude'], lat=row['latitude'])\n",
    "    result = response.json()['result']\n",
    "    tract = result['geographies']['Census Tracts'][0]\n",
    "    \n",
    "    return pd.Series(tract)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Load the data and reverse geocode it"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# load the dataset of rental listings\n",
    "listings = pd.read_csv('data/listings.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# reverse geocode each listing's lat-long to tract\n",
    "tracts = listings.apply(lambda row: geocode_row(row), axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>GEOID</th>\n",
       "      <th>AREALAND</th>\n",
       "      <th>AREAWATER</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>06083001304</td>\n",
       "      <td>5090828</td>\n",
       "      <td>144119</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>51153900701</td>\n",
       "      <td>2702493</td>\n",
       "      <td>43468</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>24031705000</td>\n",
       "      <td>4552733</td>\n",
       "      <td>14831</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>06073016301</td>\n",
       "      <td>1068848</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>22063040806</td>\n",
       "      <td>69992596</td>\n",
       "      <td>1310597</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         GEOID  AREALAND  AREAWATER\n",
       "0  06083001304   5090828     144119\n",
       "1  51153900701   2702493      43468\n",
       "2  24031705000   4552733      14831\n",
       "3  06073016301   1068848          0\n",
       "4  22063040806  69992596    1310597"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# look at the tracts we got back\n",
    "tracts[['GEOID', 'AREALAND', 'AREAWATER']].head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pid</th>\n",
       "      <th>date</th>\n",
       "      <th>region</th>\n",
       "      <th>neighborhood</th>\n",
       "      <th>rent</th>\n",
       "      <th>bedrooms</th>\n",
       "      <th>sqft</th>\n",
       "      <th>rent_sqft</th>\n",
       "      <th>rent_sqft_cat</th>\n",
       "      <th>longitude</th>\n",
       "      <th>latitude</th>\n",
       "      <th>GEOID</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>4454264047</td>\n",
       "      <td>2014-05-11</td>\n",
       "      <td>santabarbara</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3500.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1200.0</td>\n",
       "      <td>2.916667</td>\n",
       "      <td>5</td>\n",
       "      <td>-119.726987</td>\n",
       "      <td>34.399757</td>\n",
       "      <td>06083001304</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4468128892</td>\n",
       "      <td>2014-05-13</td>\n",
       "      <td>washingtondc</td>\n",
       "      <td>14300 Jeffries Rd Ste 1207</td>\n",
       "      <td>1099.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>775.0</td>\n",
       "      <td>1.418065</td>\n",
       "      <td>4</td>\n",
       "      <td>-77.268300</td>\n",
       "      <td>38.635600</td>\n",
       "      <td>51153900701</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4474037012</td>\n",
       "      <td>2014-05-16</td>\n",
       "      <td>washingtondc</td>\n",
       "      <td>Bethesda</td>\n",
       "      <td>2743.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>714.0</td>\n",
       "      <td>3.841737</td>\n",
       "      <td>5</td>\n",
       "      <td>-77.102200</td>\n",
       "      <td>39.000300</td>\n",
       "      <td>24031705000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4482003715</td>\n",
       "      <td>2014-05-21</td>\n",
       "      <td>sandiego</td>\n",
       "      <td>escondido</td>\n",
       "      <td>995.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>614.0</td>\n",
       "      <td>1.620521</td>\n",
       "      <td>4</td>\n",
       "      <td>-116.950989</td>\n",
       "      <td>32.807693</td>\n",
       "      <td>06073016301</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4488888498</td>\n",
       "      <td>2014-05-26</td>\n",
       "      <td>batonrouge</td>\n",
       "      <td>Denham Springs</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1100.0</td>\n",
       "      <td>0.909091</td>\n",
       "      <td>2</td>\n",
       "      <td>-90.942121</td>\n",
       "      <td>30.438018</td>\n",
       "      <td>22063040806</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          pid        date        region                neighborhood    rent  \\\n",
       "0  4454264047  2014-05-11  santabarbara                         NaN  3500.0   \n",
       "1  4468128892  2014-05-13  washingtondc  14300 Jeffries Rd Ste 1207  1099.0   \n",
       "2  4474037012  2014-05-16  washingtondc                    Bethesda  2743.0   \n",
       "3  4482003715  2014-05-21      sandiego                   escondido   995.0   \n",
       "4  4488888498  2014-05-26    batonrouge              Denham Springs  1000.0   \n",
       "\n",
       "   bedrooms    sqft  rent_sqft  rent_sqft_cat   longitude   latitude  \\\n",
       "0       3.0  1200.0   2.916667              5 -119.726987  34.399757   \n",
       "1       2.0   775.0   1.418065              4  -77.268300  38.635600   \n",
       "2       1.0   714.0   3.841737              5  -77.102200  39.000300   \n",
       "3       1.0   614.0   1.620521              4 -116.950989  32.807693   \n",
       "4       3.0  1100.0   0.909091              2  -90.942121  30.438018   \n",
       "\n",
       "         GEOID  \n",
       "0  06083001304  \n",
       "1  51153900701  \n",
       "2  24031705000  \n",
       "3  06073016301  \n",
       "4  22063040806  "
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# merge the listings with the tracts' geoids\n",
    "geoids = pd.DataFrame(tracts['GEOID'])\n",
    "listings_geoids = pd.concat([listings, geoids], axis=1)\n",
    "listings_geoids.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [conda root]",
   "language": "python",
   "name": "conda-root-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
